#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Main analysis
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

rm(list = ls())

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Packages, functions, parameters ####
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

## Packages ----

# packages: Core and functional packages
library(tidyverse)

# packages: Estimation and results display
library(multiwayvcov)
library(lmtest)
library(stargazer)


## Functions ----

# function `trendplot': Plot for logged detainers 
trendplot <- function(var, ylab) {
  # get treated means 
  t <- data.frame(y = tapply(treat.set[,var], treat.set$t, mean, na.rm = T))
  t$x <- rownames(t)
  t$treat <- "t"
  
  # get control means 
  c <- data.frame(y = tapply(control.set[,var], control.set$t, mean, na.rm = T))
  c$x <- rownames(c)
  c$treat <- "c"
  
  # get means for all others
  o <- data.frame(y = tapply(all.others[,var], all.others$t, mean, na.rm = T))
  o$x <- rownames(o)
  o$treat <- "o"
  
  # put them together
  toplot <- rbind(t, c, o)
  
  # plot
  toplot$treat <- factor(toplot$treat, levels = c("o", "c", "t"))
  toplot$x <- as.numeric(as.character(toplot$x))
  toplot <- toplot[toplot$x %in% c(-5:4),]
  
  g <- ggplot(data = toplot, aes(x = x, y = y, colour = treat, group = treat)) + 
    geom_point() + 
    geom_smooth(data = toplot[toplot$x<0,], method = "loess", se = FALSE) + 
    geom_smooth(data = toplot[toplot$x>=0,], method = "loess", se = FALSE) + 
    geom_vline(xintercept = 0) + 
    theme_minimal() + 
    xlab("Years to 287(g) entry/rejection") + 
    ylab(ylab) + 
    xlim(-5,4) + 
    theme(legend.position="bottom") + 
    labs(color = "") +
    scale_color_manual(labels = c("All Other Counties", "Control", "Treated"),
                       values = c("o" = "gray70", "c" = "gray30", "t" = "red3")) + 
    scale_x_discrete(limits = c(-5:4))
  print(g)
}


## Global parameters ----

# set seed
set.seed(123)

# set time zone (to avoid lubridate errors)
Sys.setenv(TZ = "America/New_York")

# define colors 
red_mit = '#A31F34'
red_light = '#A9606C'
blue_mit = '#315485'
grey_light= '#C2C0BF'
grey_dark = '#8A8B8C'
black = '#353132'



#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Data and analysis variables ####
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


## Data ----

# load data 
full <- read.csv("full_y.csv") %>%
  select(
    year, date1, sc, denied, treat, control,
    pop, total_crimes, pct_dem, prop_foreign, unemp_imp,
    ice_custody, ndetainees, ndetainees_county, ndetainees_federal,
    ndetainees_ice, ndetainees_local, ndetainees_state,
    ncharged, ncharged_low, ncharged_med, ncharged_high
  )


## Variables ----

# logged controls
full$pop_log <- log(full$pop + 1) 

# crime rate per thousand citizens 
full$crime_rate <- full$total_crimes/(full$pop/1000)

### outcomes 

# logged detainer requests
full$log_detainees <- log(full$ndetainees + 1)
full$log_detainees_county <- log(full$ndetainees_county + 1)
full$log_detainees_federal <- log(full$ndetainees_federal + 1)
full$log_detainees_ice <- log(full$ndetainees_ice + 1)
full$log_detainees_local <- log(full$ndetainees_local + 1)
full$log_detainees_state <- log(full$ndetainees_state + 1)
full$log_detainees_noncounty <- log(full$ndetainees_federal + full$ndetainees_ice + full$ndetainees_local + full$ndetainees_state + 1)

full$log_nocharge <- log(full$ndetainees - full$ncharged + 1)
full$log_ncharged <- log(full$ncharged + 1)
full$log_ncharged_high <- log(full$ncharged_high + 1)
full$log_ncharged_med <- log(full$ncharged_med + 1)
full$log_ncharged_low <- log(full$ncharged_low + 1)

# get rid of observations with missing treatment status 
full <- full[!is.na(full$treat),]



#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Treatment/Control sets ####
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

## Treatment/Control sets ----

# Fig 1a vars: 
# - From `full': year, date, sc, treat, control, log_detainees
# - From sets:   t, c, o, x

# construct treatment set
all_treats <- unique(full$sc[full$treat==1])
treats <- unique(full$sc[full$treat==1])
full$year_signed <- as.numeric(substr(full$date1, 1, 4))

treat.set <- NULL
for (i in c(1:length(treats))) {
  t <- min(full$year[full$sc==treats[i] & full$treat==1])
  start <- t - 10
  end <- t + 10
  temp <- full[full$sc==treats[i] & full$year>=start & full$year<=end,]
  temp <- temp[order(temp$year),]
  temp$t <- temp$year - t
  treat.set <- rbind(treat.set, temp)
}

# construct control set
controls <- unique(full$sc[full$control==1])
# get rid of two controls that ended up being treated later
controls <- controls[!controls %in% all_treats]

control.set <- NULL
for (i in c(1:length(controls))) {
  t <- min(full$year[full$sc==controls[i] & full$denied==1])
  start <- t - 10
  end <- t + 10
  temp <- full[full$sc==controls[i] & full$year>=start & full$year<=end,]
  temp <- temp[order(temp$year),]
  temp$t <- temp$year - t
  control.set <- rbind(control.set, temp)
}

# everyone else
others <- unique(full$sc[!full$sc %in% c(treats, controls)])
all.others <- NULL
for (i in c(1:length(treats))) {
  t <- min(full$year[full$sc==treats[i] & full$treat==1])
  start <- t - 10
  end <- t + 10
  temp <- full[!full$sc %in% c(treats, controls) & full$year>=start & full$year<=end,]
  temp$t <- temp$year - t
  all.others <- rbind(all.others, temp)
}


# Fig 1b vars: 
# - From `full': year, date, sc, treat, control, detainers, ndetainees, log_detainees, ice_custody
# - From sets:   t, c, o, x

# Plot trends for shares 
# get treated sums 
t <- data.frame(detainers = tapply(treat.set$ndetainees, treat.set$t, sum, na.rm = T),
                ice_custody = tapply(treat.set$ice_custody, treat.set$t, sum, na.rm = T))
t$y <- t$ice_custody/t$detainers
t$x <- rownames(t)
t$treat <- "t"

# get control sums 
c <- data.frame(detainers = tapply(control.set$ndetainees, control.set$t, sum, na.rm = T),
                ice_custody = tapply(control.set$ice_custody, control.set$t, sum, na.rm = T))  
c$y <- c$ice_custody/c$detainers
c$x <- rownames(c)
c$treat <- "c"

# get sums for all others
o <- data.frame(detainers = tapply(all.others$ndetainees, all.others$t, sum, na.rm = T),
                ice_custody = tapply(all.others$ice_custody, all.others$t, sum, na.rm = T))  
o$y <- o$ice_custody/o$detainers
o$x <- rownames(o)
o$treat <- "o"

# put them together
toplot <- rbind(t, c, o)

# plot
toplot$treat <- factor(toplot$treat, levels = c("o", "c", "t"))
toplot$x <- as.numeric(as.character(toplot$x))
toplot <- toplot[toplot$x %in% c(-5:4),]



#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Figure 1(a, b) ####
# Plot pretreatment trends in outcomes 
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# Plot 1a 
pdf(file = "pretrend_logged_detainers.pdf", width = 5, height = 5)
trendplot(var = "log_detainees", ylab = "Logged detainers") 
dev.off()


# Plot 1b
pdf(file = "pretrend_coop_rate.pdf", width = 5, height = 5)
ggplot(data = toplot, aes(x = x, y = y, colour = treat, group = treat)) + 
  geom_point() + 
  geom_smooth(data = toplot[toplot$x<0,], method = "loess", se = FALSE) + 
  geom_smooth(data = toplot[toplot$x>=0,], method = "loess", se = FALSE) + 
  geom_vline(xintercept = 0) + 
  theme_minimal() + 
  xlab("Years to 287(g) entry/rejection") + 
  ylab("Share of detainers transferred to ICE custody") + 
  xlim(-5,4) + 
  theme(legend.position="bottom") + 
  labs(color = "") +
  scale_color_manual(labels = c("All Other Counties", "Control", "Treated"),
                     values = c("o" = "gray70", "c" = "gray30", "t" = "red3")) + 
  scale_x_discrete(limits = c(-5:4))
dev.off()



#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Regression tables ####
# Estimate main effects: diff-in-diff 
# estimates of joining the program 
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

all <- rbind(treat.set, control.set)

# First table: logged detainers, disaggregated by facility type, focusing on sheriff signings
lm1 <- lm(log_detainees ~ treat + pct_dem + prop_foreign + pop_log + crime_rate + unemp_imp + as.factor(sc) + as.factor(year), data = all)
lm1_county <- lm(log_detainees_county ~ treat + pct_dem + prop_foreign + pop_log + crime_rate + unemp_imp + as.factor(sc) + as.factor(year), data = all)
lm1_noncounty <- lm(log_detainees_noncounty ~ treat + pct_dem + prop_foreign + pop_log + crime_rate + unemp_imp + as.factor(sc) + as.factor(year), data = all)
lm1_federal <- lm(log_detainees_federal ~ treat + pct_dem + prop_foreign + pop_log + crime_rate + unemp_imp + as.factor(sc) + as.factor(year), data = all)
lm1_ice <- lm(log_detainees_ice ~ treat + pct_dem + prop_foreign + pop_log + crime_rate + unemp_imp + as.factor(sc) + as.factor(year), data = all)
lm1_local <- lm(log_detainees_local ~ treat + pct_dem + prop_foreign + pop_log + crime_rate + unemp_imp + as.factor(sc) + as.factor(year), data = all)
lm1_state <- lm(log_detainees_state ~ treat + pct_dem + prop_foreign + pop_log + crime_rate + unemp_imp + as.factor(sc) + as.factor(year), data = all)

# Cluster standard errors on county 
se.lm1 <- coeftest(lm1, vcov = cluster.vcov(lm1, all$sc))
se.lm1_county <- coeftest(lm1_county, vcov = cluster.vcov(lm1_county, all$sc))
se.lm1_noncounty <- coeftest(lm1_noncounty, vcov = cluster.vcov(lm1_noncounty, all$sc))
se.lm1_federal <- coeftest(lm1_federal, vcov = cluster.vcov(lm1_federal, all$sc))
se.lm1_ice <- coeftest(lm1_ice, vcov = cluster.vcov(lm1_ice, all$sc))
se.lm1_local <- coeftest(lm1_local, vcov = cluster.vcov(lm1_local, all$sc))
se.lm1_state <- coeftest(lm1_state, vcov = cluster.vcov(lm1_state, all$sc))

# Create table 
stargazer(lm1, lm1_county, lm1_noncounty, 
          keep.stat = c("n", "rsq"),
          keep = c("treat", "pct_dem", "prop_foreign", "pop_log", "crime_rate", "unemp_imp"),
          order = c("treat", "pct_dem", "prop_foreign", "pop_log", "crime_rate", "unemp_imp"),
          se = list(se.lm1[,2], se.lm1_county[,2], se.lm1_noncounty[,2]), #se.lm1_federal[,2], se.lm1_ice[,2], se.lm1_local[,2], se.lm1_state[,2]),
          p = list(se.lm1[,4], se.lm1_county[,4], se.lm1_noncounty[,2]), #se.lm1_federal[,4], se.lm1_ice[,4], se.lm1_local[,4], se.lm1_state[,4]),
          intercept.top = FALSE, intercept.bottom = TRUE,
          title = "Effect of 287(g) Participation on Logged Detainers",
          covariate.labels = c("287(g) participation", "Democratic vote share", "Proportion foreign-born", "Logged population", "Crime rate per thousand", "Unemployment rate"),
          align = TRUE, type = "text",
          dep.var.labels.include = F,
          column.labels = c("\\textit{All}", "\\textit{County}", "\\textit{Non-County}"), #, "\\textit{Federal}", "\\textit{ICE}", "\\textit{Local}", "\\textit{State}"),
          dep.var.caption = "",
          star.cutoffs = c(.05, .01, .001),
          no.space = TRUE,
          out = "main_effects.tex", 
          add.lines = list(c("County FE", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}"),
                           c("Year FE", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}")))

# average outcome among treated units, pre-treatment
mean(treat.set$ndetainees[treat.set$treat==0])
mean(treat.set$ndetainees_county[treat.set$treat==0])
mean(treat.set$ndetainees[treat.set$treat==0] - treat.set$ndetainees_county[treat.set$treat==0])

mean(treat.set$ndetainees[treat.set$treat==0] - treat.set$ncharged[treat.set$treat==0])
mean(treat.set$ncharged_low[treat.set$treat==0])
mean(treat.set$ncharged_med[treat.set$treat==0])
mean(treat.set$ncharged_high[treat.set$treat==0])


# Second table: by past criminal charge 
lm2_1 <- lm(log_detainees ~ treat + pct_dem + prop_foreign + pop_log + crime_rate + unemp_imp + as.factor(sc) + as.factor(year), data = all)
lm2_2 <- lm(log_nocharge ~ treat + pct_dem + prop_foreign + pop_log + crime_rate + unemp_imp + as.factor(sc) + as.factor(year), data = all)
lm2_3 <- lm(log_ncharged_low ~ treat + pct_dem + prop_foreign + pop_log + crime_rate + unemp_imp + as.factor(sc) + as.factor(year), data = all)
lm2_4 <- lm(log_ncharged_med ~ treat + pct_dem + prop_foreign + pop_log + crime_rate + unemp_imp + as.factor(sc) + as.factor(year), data = all)
lm2_5 <- lm(log_ncharged_high ~ treat + pct_dem + prop_foreign + pop_log + crime_rate + unemp_imp + as.factor(sc) + as.factor(year), data = all)

# Cluster standard errors on county 
se.lm2_1 <- coeftest(lm2_1, vcov = cluster.vcov(lm2_1, all$sc))
se.lm2_2 <- coeftest(lm2_2, vcov = cluster.vcov(lm2_2, all$sc))
se.lm2_3 <- coeftest(lm2_3, vcov = cluster.vcov(lm2_3, all$sc))
se.lm2_4 <- coeftest(lm2_4, vcov = cluster.vcov(lm2_4, all$sc))
se.lm2_5 <- coeftest(lm2_5, vcov = cluster.vcov(lm2_5, all$sc))

stargazer(lm2_2, lm2_3, lm2_4, lm2_5,
          keep.stat = c("n", "rsq"),
          keep = c("treat", "pct_dem", "prop_foreign", "pop_log", "crime_rate", "unemp_imp"),
          order = c("treat", "pct_dem", "prop_foreign", "pop_log", "crime_rate", "unemp_imp"),
          se = list(se.lm2_2[,2], se.lm2_3[,2], se.lm2_4[,2], se.lm2_5[,2]),
          p = list(se.lm2_2[,4], se.lm2_3[,4], se.lm2_4[,4], se.lm2_5[,4]),
          intercept.top = FALSE, intercept.bottom = TRUE,
          title = "Effect of 287(g) Participation on Logged Detaineer, by Prior Criminal History",
          covariate.labels = c("287(g) participation", "Democratic vote share", "Proportion foreign-born", "Logged population", "Crime rate per thousand", "Unemployment rate"),
          align = TRUE, type = "text",
          dep.var.labels.include = F,
          column.labels = c("\\textit{No Charge}", "\\textit{Low}", "\\textit{Med}", "\\textit{High}"),
          dep.var.caption = "",
          star.cutoffs = c(.05, .01, .001),
          no.space = TRUE,
          out = "effects_policing.tex", 
          add.lines = list(c("County FE", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}"),
                           c("Year FE", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}", "\\mc{Yes}")))


